Looking at whiff data for left-handed pitchers (35 pitchers from 2022, all with 1842+ seasonal pitches).
library(tidyverse)
library(readxl)
library(knitr)
library(ranger)
library(glmnet)
library(forcats)
library(olsrr)
library(Metrics)
library(mgcv)
library(caret)
# Geom Zone (Jackie's) ####
geom_zone <- function(top = 11/3, bottom = 3/2, linecolor = "black"){
geom_rect(xmin = -.7083, xmax = .7083, ymin = bottom, ymax = top,
alpha = 0, color = linecolor, linewidth = 0.75)
}
# c(0, 0, -.25, -.5, -.25))
geom_plate <- function(pov = "pitcher"){
df <- case_when(
pov == "pitcher" ~
data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, .25, .5, .25)),
pov == "catcher" ~
data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, -.25, -.5, -.25))
)
g <- geom_polygon(data = df, aes(x = x, y = y), fill = "white", color = "black", linewidth = 1.25)
g
}
LHP <- read_csv("CSVs/lhp_pitches.csv") %>%
select(-...1) %>%
filter(!is.na(pitch_type)) %>%
mutate(pitch_type = str_replace(pitch_type, "CS", "CU"),
pitch_name = str_replace(pitch_name, "Slow Curve", "Curveball"),
pitch_type = str_replace(pitch_type, "KC", "CU"),
pitch_name = str_replace(pitch_name, "Knuckle Curve", "Curveball"))
whiff <- LHP %>%
mutate(whiff = description == "swinging_strike",
whiff = as.character(whiff)) %>%
filter(pitch_type != "NA",
pitch_type != "PO")
# Pitch Speed
whiff %>%
ggplot(aes(y = whiff, x = pitch_speed, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Pitch Speed",
x = "Pitch Speed (mph)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Horizontal Movement
whiff %>%
ggplot(aes(y = whiff, x = pfx_x*12, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Horizontal Movement",
x = "Horizontal Movement (in.)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Vertical Movement
whiff %>%
ggplot(aes(y = whiff, x = pfx_z*12, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Vertical Movement",
x = "Induced Vertical Movement (in.)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Horizontal Pitch Location
whiff %>%
ggplot(aes(y = whiff, x = plate_x, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Horizontal Pitch Location",
x = "Horizontal Pitch Location (ft)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Vertical Pitch Location
whiff %>%
ggplot(aes(y = whiff, x = plate_z, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Vertical Pitch Location",
x = "Vertical Pitch Location (ft)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Spin Rate
whiff %>%
ggplot(aes(y = whiff, x = release_spin_rate, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Spin Rate",
x = "Spin Rate (rpm)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Slider Logistic Model
whiff_sl <- whiff %>%
filter(pitch_type == "SL") %>%
mutate(whiff = str_replace(whiff, "TRUE", "1"),
whiff = str_replace(whiff, "FALSE", "0"),
whiff = as.numeric(whiff))
# Original Model
model1 <- glm(whiff ~ pitch_speed + spin_axis + pfx_x + pfx_z + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + distance,
data = whiff_sl, family = binomial)
# Reduced Model
model1 <- glm(whiff ~ pitch_speed + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + distance,
data = whiff_sl, family = binomial)
summary(model1)
##
## Call:
## glm(formula = whiff ~ pitch_speed + plate_x + plate_z + release_spin_rate +
## speed_change + break_change + pfx_total + distance, family = binomial,
## data = whiff_sl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.3855810 0.9352388 -6.828 8.63e-12 ***
## pitch_speed 0.0510083 0.0105993 4.812 1.49e-06 ***
## plate_x -0.9124614 0.0605877 -15.060 < 2e-16 ***
## plate_z 1.0431027 0.0714570 14.598 < 2e-16 ***
## release_spin_rate 0.0002201 0.0001268 1.737 0.0825 .
## speed_change -0.0074160 0.0234142 -0.317 0.7514
## break_change 0.1752853 0.2048408 0.856 0.3922
## pfx_total -0.1349373 0.1222709 -1.104 0.2698
## distance -1.8789629 0.0888621 -21.145 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 11359 on 13558 degrees of freedom
## Residual deviance: 10591 on 13550 degrees of freedom
## (98 observations deleted due to missingness)
## AIC: 10609
##
## Number of Fisher Scoring iterations: 6
preds <- whiff_sl %>%
mutate(prediction_log = predict(model1, whiff_sl),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
preds %>%
ggplot(aes(x = as.character(whiff), y = prediction)) +
geom_boxplot() +
geom_jitter(alpha = 0.1, width = 0.1, height = 0)
preds %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = `mean(whiff)`)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Whiff proportion by predicted whiff value",
subtitle = "Whiff predictions have a 1% bin width")
whiff %>%
mutate(count = paste0(balls, "-", strikes)) %>%
filter(pitch_type == "SL") %>%
ggplot(aes(y = whiff, x = pfx_z*12)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~count) +
labs(title = "Whiff vs. Non-Whiff by Vertical Movement",
x = "Induced Vertical Movement (in.)",
y = "Outcome") +
NULL
# Sliders
whiff %>%
filter(pitch_type =="SL") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
# Fastballs
whiff %>%
filter(pitch_type =="FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
# Change-Ups
whiff %>%
filter(pitch_type =="CH") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
whiff %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(hitter == "R",
pitch_type == "SL",
prev_pitch %in% c("FF", "CH", "SL", "CU"),
player_name == "Fried, Max") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(prev_pitch), rows = vars(whiff)) +
theme_bw()
whiff %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(hitter == "R",
pitch_type == "FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(player_name), rows = vars(whiff)) +
theme_bw()
zoned <- whiff %>%
mutate(loc_x = round(plate_x*3, 0),
loc_y = round(plate_z*3, 0))
zoned %>%
filter(pitch_type == "FF",
plate_z > 0 & plate_z < 6,
plate_x > -1.5 & plate_x < 1.5) %>%
summarize(whiff_perc = mean(whiff == "TRUE"),
pitches = n(),
.by = c(loc_x, loc_y, player_name)) %>%
filter(pitches >= 10) %>%
ggplot(aes(x = -loc_x, y = loc_y, fill = whiff_perc)) +
geom_tile() +
scale_fill_gradient(low = "gray", high = "red") +
facet_wrap(~ player_name) +
coord_fixed() +
theme_bw()
LHP2 <- LHP %>%
mutate(wOBAr = case_when(
woba >= 0.360 ~ 6,
woba >= 0.335 ~ 5,
woba >= 0.310 ~ 4,
woba >= 0.285 ~ 3,
woba >= 0.260 ~ 2,
woba >= 0.235 ~ 1)) %>%
mutate(wOBAr = as.factor(wOBAr))
LHP2 %>%
ggplot(aes(x = wOBAr, pitch_speed, color = wOBAr)) +
geom_boxplot() +
facet_wrap(~ pitch_name)
LHP2 %>%
ggplot(aes(x = estimated_woba_using_speedangle, pitch_speed)) +
geom_point(alpha = 0.2) +
facet_wrap(~ pitch_name)